from transformers import AutoModelForCausalLM, AutoTokenizer


'''
huggingface-cli download --resume-download Qwen/Qwen3-0.6B --local-dir qwen3_0.6B

'''
model_folder = r'D:\Models\qwen3_0.6b'

model_name = "Qwen/Qwen3-0.6B"
tokenizer = AutoTokenizer.from_pretrained(model_folder)
model = AutoModelForCausalLM.from_pretrained(
    model_folder,
    torch_dtype="auto",
    device_map="auto"
)
print(model)

prompt = "Give me a short introduction to large language model."
messages = [
    {"role": "user", "content": prompt}
]

text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=True # Switches between thinking and non-thinking modes. Default is True.
)
print(text)
